From 01fe2a3416a885b828c62080a9f83b80ee3c5fb9 Mon Sep 17 00:00:00 2001 From: "mjw@wray-m-3.hpl.hp.com" Date: Mon, 26 Jul 2004 09:32:17 +0000 Subject: [PATCH] bitkeeper revision 1.1108.14.1 (4104cfa1bp3FkfR2vYJwebaywi6JAg) Stop a domain restarting if it's too soon since the last one. --- tools/python/xen/xend/XendDomain.py | 77 +++++++++++++------------ tools/python/xen/xend/XendDomainInfo.py | 63 +++++++++++++++----- 2 files changed, 88 insertions(+), 52 deletions(-) diff --git a/tools/python/xen/xend/XendDomain.py b/tools/python/xen/xend/XendDomain.py index f44b5c1ad5..6f2ac91928 100644 --- a/tools/python/xen/xend/XendDomain.py +++ b/tools/python/xen/xend/XendDomain.py @@ -130,9 +130,11 @@ class XendDomain: def initial_refresh(self): """Refresh initial domain info from domain_db. """ - #for d in self.domain_db.values(): print 'db dom=', d + + def cb_all_ok(val): + self.refresh() + domlist = xc.domain_getinfo() - #for d in domlist: print 'xc dom=', d doms = {} for d in domlist: domid = str(d['dom']) @@ -140,22 +142,13 @@ class XendDomain: dlist = [] for config in self.domain_db.values(): domid = str(sxp.child_value(config, 'id')) - #print "dom=", domid, "config=", config if domid in doms: - #print "dom=", domid, "new" - deferred = self._new_domain(config, doms[domid]) - dlist.append(deferred) + d_dom = self._new_domain(config, doms[domid]) + dlist.append(d_dom) else: - #print "dom=", domid, "del" self._delete_domain(domid) - deferred = defer.DeferredList(dlist, fireOnOneErrback=1) - def cbok(val): - #print "doms:" - #for d in self.domain.values(): print 'dom', d - self.refresh() - #print "XendDomain>initial_refresh> doms:" - #for d in self.domain.values(): print 'dom', d - deferred.addCallback(cbok) + d_all = defer.DeferredList(dlist, fireOnOneErrback=1) + d_all.addCallback(cb_all_ok) def sync(self): """Sync domain db to disk. @@ -179,10 +172,13 @@ class XendDomain: @param info: domain info from xen @return: deferred """ - deferred = XendDomainInfo.vm_recreate(savedinfo, info) - def fn(dominfo): + def cbok(dominfo): self.domain[dominfo.id] = dominfo - deferred.addCallback(fn) + if dominfo.restart_pending(): + self.domain_restart_add(dominfo) + + deferred = XendDomainInfo.vm_recreate(savedinfo, info) + deferred.addCallback(cbok) return deferred def _add_domain(self, id, info, notify=1): @@ -250,9 +246,9 @@ class XendDomain: if id not in self.domain: savedinfo = None deferred = XendDomainInfo.vm_recreate(savedinfo, d) - def fn(dominfo): + def cbok(dominfo): self._add_domain(dominfo.id, dominfo) - deferred.addCallback(fn) + deferred.addCallback(cbok) # Remove entries for domains that no longer exist. for d in self.domain.values(): info = doms.get(d.id) @@ -313,11 +309,11 @@ class XendDomain: @param config: configuration @return: deferred """ - deferred = XendDomainInfo.vm_create(config) - def fn(dominfo): + def cbok(dominfo): self._add_domain(dominfo.id, dominfo) return dominfo - deferred.addCallback(fn) + deferred = XendDomainInfo.vm_create(config) + deferred.addCallback(cbok) return deferred def domain_restart(self, dominfo): @@ -326,11 +322,12 @@ class XendDomain: @param dominfo: domain object @return: deferred """ - deferred = dominfo.restart() - def fn(dominfo): + def cbok(dominfo): self._add_domain(dominfo.id, dominfo) return dominfo - deferred.addCallback(fn) + log.info("Restarting domain: id=%s name=%s", dominfo.id, dominfo.name) + deferred = dominfo.restart() + deferred.addCallback(cbok) return deferred def domain_configure(self, id, config): @@ -348,11 +345,11 @@ class XendDomain: raise XendError("Invalid domain: " + str(id)) if dominfo.config: raise XendError("Domain already configured: " + str(id)) - def fn(dominfo): + def cbok(dominfo): self._add_domain(dominfo.id, dominfo) return dominfo deferred = dominfo.construct(config) - deferred.addCallback(fn) + deferred.addCallback(cbok) return deferred def domain_restore(self, src, progress=0): @@ -363,11 +360,11 @@ class XendDomain: @return: deferred """ - def fn(dominfo): + def cbok(dominfo): self._add_domain(dominfo.id, dominfo) return dominfo deferred = XendDomainInfo.vm_restore(src, progress=progress) - deferred.addCallback(fn) + deferred.addCallback(cbok) return deferred def domain_get(self, id): @@ -439,9 +436,12 @@ class XendDomain: restart = (force and reason == 'reboot') or dominfo.restart_needed(reason) if restart: dominfo.restarting() - self.restarts[id] = dominfo - log.info('Scheduling restart for domain: id=%s name=%s', id, dominfo.name) - self.domain_restarts_schedule() + self.domain_restart_add(dominfo) + + def domain_restart_add(self, dominfo): + self.restarts[dominfo.id] = dominfo + log.info('Scheduling restart for domain: id=%s name=%s', dominfo.id, dominfo.name) + self.domain_restarts_schedule() def domain_restart_cancel(self, id): """Cancel any restart scheduled for a domain. @@ -450,6 +450,7 @@ class XendDomain: """ dominfo = self.restarts.get(id) if dominfo: + log.info('Cancelling restart for domain: id=%s name=%s', dominfo.id, dominfo.name) dominfo.restart_cancel() del self.restarts[id] @@ -465,18 +466,18 @@ class XendDomain: # Remove it from the restarts. del self.restarts[id] try: - log.info('domain_restarts> restart: id=%s config=%s', id, str(dominfo.config)) def cbok(dominfo): - log.info('Restarted domain %s as %s', id, dominfo.id) + log.info('Restarted domain id=%s as %s', id, dominfo.id) self.domain_unpause(dominfo.id) def cberr(err): - log.exception("Delayed exception restarting domain") + log.exception("Delayed exception restarting domain: id=%s", id) deferred = self.domain_restart(dominfo) deferred.addCallback(cbok) deferred.addErrback(cberr) except: - log.exception("Exception restarting domain") + log.exception("Exception restarting domain: id=%s", id) if len(self.restarts): + # Run again later if any restarts remain. self.refresh_schedule(delay=5) def final_domain_destroy(self, id): @@ -487,7 +488,7 @@ class XendDomain: dom = int(id) if dom <= 0: return 0 - log.info('Destroying domain %s', str(id)) + log.info('Destroying domain: id=%s', str(id)) eserver.inject('xend.domain.destroy', id) dominfo = self.domain.get(id) if dominfo: diff --git a/tools/python/xen/xend/XendDomainInfo.py b/tools/python/xen/xend/XendDomainInfo.py index 2a2743d3ef..f2a866bea6 100644 --- a/tools/python/xen/xend/XendDomainInfo.py +++ b/tools/python/xen/xend/XendDomainInfo.py @@ -65,6 +65,9 @@ restart_modes = [ STATE_RESTART_PENDING = 'pending' STATE_RESTART_BOOTING = 'booting' +STATE_VM_OK = "ok" +STATE_VM_TERMINATED = "terminated" + def shutdown_reason(code): """Get a shutdown reason from a code. @@ -273,7 +276,11 @@ def vm_recreate(savedinfo, info): vm.memory = info['mem_kb']/1024 start_time = sxp.child_value(savedinfo, 'start_time') if start_time is not None: - vm.startTime = float(start_time) + vm.start_time = float(start_time) + vm.restart_state = sxp.child_value(savedinfo, 'restart_state') + restart_time = sxp.child_value(savedinfo, 'restart_time') + if restart_time is not None: + vm.restart_time = float(restart_time) config = sxp.child_value(savedinfo, 'config') if config: d = vm.construct(config) @@ -339,15 +346,16 @@ def _vm_configure2(val, vm): class XendDomainInfo: """Virtual machine object.""" - STATE_OK = "ok" - STATE_TERMINATED = "terminated" + """Minimum time between domain restarts in seconds. + """ + MINIMUM_RESTART_TIME = 10 def __init__(self): self.recreate = 0 self.config = None self.id = None self.dom = None - self.startTime = None + self.start_time = None self.name = None self.memory = None self.image = None @@ -361,11 +369,12 @@ class XendDomainInfo: self.blkif_backend = 0 self.netif_backend = 0 #todo: state: running, suspended - self.state = self.STATE_OK + self.state = STATE_VM_OK #todo: set to migrate info if migrating self.migrate = None self.restart_mode = RESTART_ONREBOOT self.restart_state = None + self.restart_time = None self.console_port = None def setdom(self, dom): @@ -412,13 +421,17 @@ class XendDomainInfo: sxpr.append(['cpu', self.info['cpu']]) sxpr.append(['cpu_time', self.info['cpu_time']/1e9]) - if self.startTime: - upTime = time.time() - self.startTime - sxpr.append(['up_time', str(upTime) ]) - sxpr.append(['start_time', str(self.startTime) ]) + if self.start_time: + up_time = time.time() - self.start_time + sxpr.append(['up_time', str(up_time) ]) + sxpr.append(['start_time', str(self.start_time) ]) if self.console: sxpr.append(self.console.sxpr()) + if self.restart_state: + sxpr.append(['restart_state', self.restart_state]) + if self.restart_time: + sxpr.append(['restart_time', str(self.restart_time)]) if self.config: sxpr.append(['config', self.config]) return sxpr @@ -551,13 +564,13 @@ class XendDomainInfo: def cleanup(self): """Cleanup vm resources: release devices. """ - self.state = self.STATE_TERMINATED + self.state = STATE_VM_TERMINATED self.release_devices() def is_terminated(self): """Check if a domain has been terminated. """ - return self.state == self.STATE_TERMINATED + return self.state == STATE_VM_TERMINATED def release_devices(self): """Release all vm devices. @@ -617,8 +630,8 @@ class XendDomainInfo: log.debug('init_domain> Created domain=%d name=%s memory=%d', dom, name, memory) self.setdom(dom) - if self.startTime is None: - self.startTime = time.time() + if self.start_time is None: + self.start_time = time.time() def build_domain(self, ostype, kernel, ramdisk, cmdline, vifs_n): """Build the domain boot image. @@ -628,7 +641,6 @@ class XendDomainInfo: log.warning('kernel cmdline too long, domain %d', self.dom) dom = self.dom buildfn = getattr(xc, '%s_build' % ostype) - #print 'build_domain>', ostype, dom, kernel, cmdline, ramdisk flags = 0 if self.netif_backend: flags |= SIF_NET_BE_DOMAIN if self.blkif_backend: flags |= SIF_BLK_BE_DOMAIN @@ -753,8 +765,31 @@ class XendDomainInfo: def restarting(self): self.restart_state = STATE_RESTART_PENDING + def restart_pending(self): + return self.restart_state == STATE_RESTART_PENDING + + def restart_check(self): + """Check if domain restart is OK. + To prevent restart loops, raise an error it is + less than MINIMUM_RESTART_TIME seconds since the last restart. + """ + tnow = time.time() + if self.restart_time is not None: + tdelta = tnow - self.restart_time + if tdelta < self.MINIMUM_RESTART_TIME: + msg = 'VM %d restarting too fast' % self.dom + log.error(msg) + raise VmError(msg) + self.restart_time = tnow + def restart(self): + """Restart the domain after it has exited. + Reuses the domain id and console port. + + @return: deferred + """ try: + self.restart_check() self.restart_state = STATE_RESTART_BOOTING d = self.construct(self.config) finally: -- 2.30.2